#######################################
#######################################
###Generate the GAM plots for Fig. 1###
#######################################
#######################################
library(doBy)
library(ggplot2)
library(mgcv)
library(dplyr)
library(interplot)
library(survival)
library(MASS)
library(foreign)
library(stargazer)
library(lmtest)
library(sandwich)
library(multiwayvcov)

###Set working directory
setwd("~/OneDrive - Indiana University/FromGoogle/DeforestationDisease/CodeFinal_12_5_24/")
full.2003.2019<- read.csv("afrogriddiseasejaxa20032019.csv")
summary(full.2003.2019$year)
###Create deforestation indicators
#Contemp
full.2003.2019$deforestation_shock2 <- ifelse(full.2003.2019$jaxa_forest_y_n==1, full.2003.2019$NDVI_mean_anom, 0)
summary(full.2003.2019$deforestation_shock2)
#One month lag
na.cor <- na.omit(full.2003.2019[,c("deforestation_shock2","confirm","year")])
cor(na.cor$confirm,na.cor$deforestation_shock2)

#####################
###Biavariate plot###
#####################
###Subset NA data
#na.cor.short <- subset(na.cor, (deforestation_shock>(-2) & deforestation_shock<2) )
#Plot correlation with GAM
p0 <- ggplot(data = na.cor, aes(x = deforestation_shock2, y = confirm))+
  geom_smooth(color = "darkblue", size = 1, method='gam')+
 # scale_y_continuous( labels=function(x)(((x-0.0004)/0.0004))*100)+
  xlab("Vegetation growth anomalies in forest areas") +
  #ylim(0, 0.0027) +
  ylab("Change in outbreak counts") 
jpeg("figure1a.jpeg", width = 6, height = 6, units = 'in', res = 500)
p0
dev.off()

##################
###Baseline GAM###
##################
###Run model
pois_mod_sep1 <- bam(confirm ~ s(deforestation_shock2, bs = "cr")+
                       log.nl + logged.pop+c_gid, data=full.2003.2019, family="poisson")
summary(pois_mod_sep1)

#Generate test data using model data so it will have NAs removed.
testdata = data.frame(
  deforestation_shock2 = seq((-7.8), 5.4, length = 100),
  log.nl   = mean(pois_mod_sep1$model$log.nl),
  logged.pop = mean(pois_mod_sep1$model$logged.pop),
  c_gid = mean(pois_mod_sep1$model$c_gid)
)
##Create prediction data
predictions = predict(
  pois_mod_sep1,
  newdata = testdata,
  type = 'response',
  se = TRUE
)
#Calculate 95% CIs
df_preds = data.frame(testdata, predictions) %>%
  mutate(lower = fit - 1.96 * se.fit,
         upper = fit + 1.96 * se.fit)
#Plot graph
p1.pois <- ggplot(aes(x = deforestation_shock2, y = fit), data = df_preds) +
  geom_ribbon(aes(ymin = lower, ymax = upper), fill = 'lightgray') +
  geom_line(color = 'darkblue',size = 1)+
   #scale_y_continuous( labels=function(x)(((x-0.0004)/0.0004))*100)+
  #ylim(-0.0005, 0.0005) +
  xlab("Vegetation growth anomalies in forest areas") +
  ylab("Adjusted change in outbreak counts") 
jpeg("figure1b.jpeg", width = 6, height = 6, units = 'in', res = 500)
p1.pois
dev.off()


##########
###Full###
##########
###Run model
pois_mod_sep2 <- bam(confirm ~ s(deforestation_shock2, bs = "cr")+
                       log.nl + logged.pop +
                       logged_state + logged_nonstate +
                       p_anom+spei+t_anom+f_month2+f_month3+
                       f_month4+f_month5+f_month6+f_month7+f_month8+
                       f_month9 +f_month10+f_month11+
                       f_month12+c_gid, data=full.2003.2019, family="poisson")
summary(pois_mod_sep2)

#Generate test data using model data so it will have NAs removed.
testdata = data.frame(
  deforestation_shock2 = seq((-7.8), 5.4, length = 100),
  log.nl   = mean(pois_mod_sep2$model$log.nl),
  logged.pop = mean(pois_mod_sep2$model$logged.pop),
  p_anom= mean(pois_mod_sep2$model$p_anom),
  spei= mean(pois_mod_sep2$model$spei),
  t_anom= mean(pois_mod_sep2$model$t_anom),
  logged_state =0,
  logged_nonstate =0,
  f_month2=0,
  f_month3 =0,  
  f_month4=0,
  f_month5 =0,
  f_month6=0, 
  f_month7=0,
  f_month8=0,
  f_month9 =0,
  f_month10 =0,
  f_month11 =0,
  f_month12=0,
  c_gid = mean(pois_mod_sep2$model$c_gid)
)

##Create prediction data
predictions = predict(
  pois_mod_sep2,
  newdata = testdata,
  type = 'response',
  se = TRUE
)
#Calculate 95% CIs
df_preds = data.frame(testdata, predictions) %>%
  mutate(lower = fit - 1.96 * se.fit,
         upper = fit + 1.96 * se.fit)
#Plot graph
p2.pois <- ggplot(aes(x = deforestation_shock2, y = fit), data = df_preds) +
  geom_ribbon(aes(ymin = lower, ymax = upper), fill = 'lightgray') +
  geom_line(color = 'darkblue',size = 1)+
  # scale_y_continuous( labels=function(x)(((x-0.0004)/0.0004))*100)+
  #ylim(-0.0005, 0.0005) +
  xlab("Vegetation growth anomalies in forest areas") +
  ylab("Adjusted change in outbreak counts") 
jpeg("figure1c.jpeg", width = 6, height = 6, units = 'in', res = 500)
p2.pois
dev.off()


###############
###Optimized###
###############
###Run model
pois_mod_sep3 <- bam(confirm ~ s(deforestation_shock2, bs = "cr")+ 
                       log.nl + logged.pop + logged_nonstate + 
                       t_anom + f_month5 + f_month6 + f_month7 + f_month9, 
                     data=full.2003.2019, family="poisson")
summary(pois_mod_sep3)

#Generate test data using model data so it will have NAs removed.
testdata = data.frame(
  deforestation_shock2 = seq((-7.8), 5.4, length = 100),
  log.nl   = mean(pois_mod_sep3$model$log.nl),
  logged.pop = mean(pois_mod_sep3$model$logged.pop),
  t_anom= mean(pois_mod_sep3$model$t_anom),
  logged_nonstate =0,
  f_month5 =0,
  f_month6=0, 
  f_month7=0,
  f_month9 =0
)

##Create prediction data
predictions = predict(
  pois_mod_sep3,
  newdata = testdata,
  type = 'response',
  se = TRUE
)
#Calculate 95% CIs
df_preds = data.frame(testdata, predictions) %>%
  mutate(lower = fit - 1.96 * se.fit,
         upper = fit + 1.96 * se.fit)
#Plot graph
p3.pois <- ggplot(aes(x = deforestation_shock2, y = fit), data = df_preds) +
  geom_ribbon(aes(ymin = lower, ymax = upper), fill = 'lightgray') +
  geom_line(color = 'darkblue',size = 1)+
  # scale_y_continuous( labels=function(x)(((x-0.0004)/0.0004))*100)+
  #ylim(-0.0005, 0.0005) +
  xlab("Vegetation growth anomalies in forest areas") +
  ylab("Adjusted change in outbreak counts") 
jpeg("figure4d.jpeg", width = 6, height = 6, units = 'in', res = 500)
p3.pois
dev.off()

##################
###Optimized RE###
##################
##Run model
pois_mod_sep4 <- bam(confirm ~ s(deforestation_shock2, bs = "cr")+
                       log.nl + logged.pop + logged_nonstate +
                       t_anom + f_month5 + f_month6 + f_month7 + f_month9+
                       s(gid, bs = "re")+s(month, bs = "re"),
                     data=full.2003.2019, family="poisson", method="REML")
summary(pois_mod_sep4)
plot(pois_mod_sep4)


#Generate test data using model data so it will have NAs removed.
testdata = data.frame(
  deforestation_shock2 = seq((-7.8), 5.4, length = 100),
  log.nl   = mean(pois_mod_sep4$model$log.nl),
  logged.pop = mean(pois_mod_sep4$model$logged.pop),
  t_anom= mean(pois_mod_sep4$model$t_anom),
  logged_nonstate =0,
  f_month5 =0,
  f_month6=0,
  f_month7=0,
  f_month9 =0,
  gid = seq(min(pois_mod_sep4$model$gid), max(pois_mod_sep4$model$gid), length = 100),
  month=seq(min(pois_mod_sep4$model$month), max(pois_mod_sep4$model$month), length = 100)
)

##Create prediction data
predictions = predict(
  pois_mod_sep4,
  newdata = testdata,
  type = 'response',
  se = TRUE
)
#Calculate 95% CIs
df_preds = data.frame(testdata, predictions) %>%
  mutate(lower = fit - 1.96 * se.fit,
         upper = fit + 1.96 * se.fit)
#Plot graph
p4.pois <- ggplot(aes(x = deforestation_shock2, y = fit), data = df_preds) +
  geom_ribbon(aes(ymin = lower, ymax = upper), fill = 'lightgray') +
  geom_line(color = 'darkblue',size = 1)+
  # scale_y_continuous( labels=function(x)(((x-0.0004)/0.0004))*100)+
  #ylim(-0.0005, 0.0005) +
  xlab("Vegetation growth anomalies in forest areas") +
  ylab("Adjusted change in outbreak counts")
jpeg("figure1e.jpeg", width = 6, height = 6, units = 'in', res = 500)
p4.pois
dev.off()


############
###Output###
############

###Adjust AIC and GCV for full model N
#NA Omit
s.dat <- na.omit(full.2003.2019[,c("confirm", "deforestation_shock2","log.nl","logged.pop","logged_state","logged_nonstate","p_anom","spei","t_anom","f_month2","f_month3","f_month4","f_month5","f_month6","f_month7","f_month8","f_month9","f_month10","f_month11","f_month12","month","c_gid","gid")])
#Socioeconomic
pois_mod_sep1s <- bam(confirm ~ s(deforestation_shock2, bs = "cr")+
                        log.nl + logged.pop+c_gid, data=s.dat, family="poisson")
summary(pois_mod_sep1s)
#Optimized
pois_mod_sep3s <- bam(confirm ~ s(deforestation_shock2, bs = "cr")+ 
                       log.nl + logged.pop + logged_nonstate + 
                       t_anom + f_month5 + f_month6 + f_month7 + f_month9, 
                     data=s.dat, family="poisson")
summary(pois_mod_sep3s)
###Optimized RE
pois_mod_sep4s <- bam(confirm ~ s(deforestation_shock2, bs = "cr")+
                        log.nl + logged.pop + logged_nonstate +
                        t_anom + f_month5 + f_month6 + f_month7 + f_month9+
                        s(gid, bs = "re")+s(month, bs = "re"),
                      data=s.dat, family="poisson", method="REML")
summary(pois_mod_sep4s)


##Compare AICs
#Regular models
pois_mod_sep1$aic
pois_mod_sep2$aic
pois_mod_sep3$aic
#Adjusted AIC
pois_mod_sep1s$aic
pois_mod_sep3s$aic
pois_mod_sep4s$aic

########################
###Summary statistics###
########################
#Confirmed outbreaks 
c(min(full.2003.2019$confirm, na.rm=TRUE),median(full.2003.2019$confirm, na.rm=TRUE),mean(full.2003.2019$confirm, na.rm=TRUE),max(full.2003.2019$confirm, na.rm=TRUE),sd(full.2003.2019$confirm, na.rm=TRUE))
#deforestation_shock 
c(min(full.2003.2019$deforestation_shock2, na.rm=TRUE),median(full.2003.2019$deforestation_shock2, na.rm=TRUE),mean(full.2003.2019$deforestation_shock2, na.rm=TRUE),max(full.2003.2019$deforestation_shock2, na.rm=TRUE),sd(full.2003.2019$deforestation_shock2, na.rm=TRUE))
#log.nl 
c(min(full.2003.2019$log.nl, na.rm=TRUE),median(full.2003.2019$log.nl, na.rm=TRUE),mean(full.2003.2019$log.nl, na.rm=TRUE),max(full.2003.2019$log.nl, na.rm=TRUE),sd(full.2003.2019$log.nl, na.rm=TRUE))
#logged.pop
c(min(full.2003.2019$logged.pop, na.rm=TRUE),median(full.2003.2019$logged.pop, na.rm=TRUE),mean(full.2003.2019$logged.pop, na.rm=TRUE),max(full.2003.2019$logged.pop, na.rm=TRUE),sd(full.2003.2019$logged.pop, na.rm=TRUE))
#logged_state 
c(min(full.2003.2019$logged_state, na.rm=TRUE),median(full.2003.2019$logged_state, na.rm=TRUE),mean(full.2003.2019$logged_state, na.rm=TRUE),max(full.2003.2019$logged_state, na.rm=TRUE),sd(full.2003.2019$logged_state, na.rm=TRUE))
#logged_nonstate  
c(min(full.2003.2019$logged_nonstate, na.rm=TRUE),median(full.2003.2019$logged_nonstate, na.rm=TRUE),mean(full.2003.2019$logged_nonstate, na.rm=TRUE),max(full.2003.2019$logged_nonstate, na.rm=TRUE),sd(full.2003.2019$logged_nonstate, na.rm=TRUE))
#p_anom 
c(min(full.2003.2019$p_anom, na.rm=TRUE),median(full.2003.2019$p_anom, na.rm=TRUE),mean(full.2003.2019$p_anom, na.rm=TRUE),max(full.2003.2019$p_anom, na.rm=TRUE),sd(full.2003.2019$p_anom, na.rm=TRUE))
#spei
c(min(full.2003.2019$spei, na.rm=TRUE),median(full.2003.2019$spei, na.rm=TRUE),mean(full.2003.2019$spei, na.rm=TRUE),max(full.2003.2019$spei, na.rm=TRUE),sd(full.2003.2019$spei, na.rm=TRUE))
#t_anom 
c(min(full.2003.2019$t_anom, na.rm=TRUE),median(full.2003.2019$t_anom, na.rm=TRUE),mean(full.2003.2019$t_anom, na.rm=TRUE),max(full.2003.2019$t_anom, na.rm=TRUE),sd(full.2003.2019$t_anom, na.rm=TRUE))

